# """Utils for language models."""

# import re


# # ------------------------
# # utils for shakespeare dataset

# # Vocabulary re-used from the Federated Learning for Text Generation tutorial.
# # https://www.tensorflow.org/federated/tutorials/federated_learning_for_text_generation
# CHAR_VOCAB = list(
#     'dhlptx@DHLPTX $(,048cgkoswCGKOSW[_#\'/37;?bfjnrvzBFJNRVZ"&*.26:\naeimquyAEIMQUY]!%)-159\r'
# )

# # ALL_LETTERS = "\n !\"&'(),-.0123456789:;>?ABCDEFGHIJKLMNOPQRSTUVWXYZ[]abcdefghijklmnopqrstuvwxyz}"
# ALL_LETTERS = "".join(CHAR_VOCAB)

# # Vocabulary with OOV ID, zero for the padding, and BOS, EOS IDs.
# VOCAB_SIZE = len(ALL_LETTERS) + 4

# def _one_hot(index, size):
#     '''returns one-hot vector with given size and value 1 at given index
#     '''
#     vec = [0 for _ in range(size)]
#     vec[int(index)] = 1
#     return vec


# def letter_to_vec(letter):
#     '''returns one-hot representation of given letter
#     '''
#     index = ALL_LETTERS.find(letter)
#     return _one_hot(index, VOCAB_SIZE)

# def letter_to_index(letter):
#     '''returns one-hot representation of given letter
#     '''
#     index = ALL_LETTERS.find(letter)
#     return index

# def word_to_indices(word):
#     '''returns a list of character indices

#     Args:
#         word: string

#     Return:
#         indices: int list with length len(word)
#     '''
#     indices = []
#     for c in word:
#         indices.append(ALL_LETTERS.find(c))
#     return indices


# # ------------------------
# # utils for sent140 dataset


# def split_line(line):
#     '''split given line/phrase into list of words

#     Args:
#         line: string representing phrase to be split

#     Return:
#         list of strings, with each string representing a word
#     '''
#     return re.findall(r"[\w']+|[.,!?;]", line)


# def _word_to_index(word, indd):
#     '''returns index of given word based on given lookup dictionary

#     returns the length of the lookup dictionary if word not found

#     Args:
#         word: string
#         indd: dictionary with string words as keys and int indices as values
#     '''
#     if word in indd:
#         return indd[word]
#     else:
#         return len(indd)


# def line_to_indices(line, word2id, max_words=25):
#     '''converts given phrase into list of word indices

#     if the phrase has more than max_words words, returns a list containing
#     indices of the first max_words words
#     if the phrase has less than max_words words, repeatedly appends integer
#     representing unknown index to returned list until the list's length is
#     max_words

#     Args:
#         line: string representing phrase/sequence of words
#         word2id: dictionary with string words as keys and int indices as values
#         max_words: maximum number of word indices in returned list

#     Return:
#         indl: list of word indices, one index for each word in phrase
#     '''
#     unk_id = len(word2id)
#     line_list = split_line(line) # split phrase in words
#     indl = [word2id[w] if w in word2id else unk_id for w in line_list[:max_words]]
#     indl += [unk_id]*(max_words-len(indl))
#     return indl


# def bag_of_words(line, vocab):
#     '''returns bag of words representation of given phrase using given vocab

#     Args:
#         line: string representing phrase to be parsed
#         vocab: dictionary with words as keys and indices as values

#     Return:
#         integer list
#     '''
#     bag = [0]*len(vocab)
#     words = split_line(line)
#     for w in words:
#         if w in vocab:
#             bag[vocab[w]] += 1
#     return bag
